** Code for Table 1 in The Effects of Changes in Local Bank Health on Household Consumption
** by Daniel Cooper and Joe Peek, ReStat 2020
/* ////////////////////////////////////////////////////////////////////////// */
clear all
set more off
set maxvar 30000

/* Equifax data to motivate/support BH results. Create real per capita loan variables 
at the MSA level and regress against BH, HP and bartik employment growth.

* The data was downloaded from RADAR (a Federal Reserve data access tool) year by year 
and each year was processed seperately. The data come from the NY Fed consumer credit panel (CCP) provided by Equifax.

Using a full 5% random sample of the entire US population (that have credit) (individual level data), 
so very large dataset and very slow do file (except for the analysis section which is collapsed to the MSA level)

Two seperate data sources: main equifax and mortgage tradeline (loan level) data which are used to
to create a better measure of home equity loans 
		
        regressions run with both main data (no HE loans) and supplemented with mortgage tradeline data (HE loans)
		different base samples as well: tradeline only includes people that have had a mortgage or HE loan.
				
**NOTE: Make sure run the tables block only after fully create both mort and nomort versions of data** */


local filename "equifax_full_all"

* The code is divided into blocks for ease of execution
********************************************************************************
* Versions
scalar mort_vars = 1            /*includes tradline (HE) data*/
scalar nomort_vars = 0              /*excludes tradeline (HE) data*/
*can only have one on at a time, need to run each for table to work

**********************
* Blocks
scalar unzip = 0                     /*unzip files from data query tool*/
scalar equifax = 0                   /*main equifax data*/
scalar mort = 0                      /*tradeline variables*/
	*will only run if mort_vars is on 
scalar msa = 0                       /*create MSA variables from zipcodes*/
scalar clean = 0                     /*clean and create variables*/

scalar analysis = 1                 /*final analysis*/
	scalar merge = 0                /*merge together BH, emp gr, HP gr with equifax (needed to run regressions)*/
	scalar tables = 1                /*create regression tables: mort and nonmort together*/
		* will only run after the above have been run on both mort_vars and nomort_vars settings 
********************************************************************************

* update with relevant paths
local data_dir /shared/Joe_Peek/bankhealth_nocamels/data
local tex_dir /shared/Joe_Peek/bankhealth_nocamels/final_tables

*Choose BH measure want to use for tables
* the BH data are read in below in the "merge" block but need to set a flag here for what to use in the table.


local re_version re_a 
local npl_version npl_a
			
if mort_vars {
	local version full
}
if nomort_vars {
	local version full_nomort
}
			
if unzip {
cd "`data_dir'/equifax/fullyrs"

forval y = 2014/2015 {
	!gzip -d -c main`y'.gz > main`y'.dta
	!gzip -d -c mort`y'.gz > mort`y'.dta
}
}
////////////////////////////////////////////////////////////////////////////////
if equifax {

forval y = 1999/2015 {

use `data_dir'/equifax/fullyrs/main`y', clear

*time variable
g year = year(qtr)
g mth = month(qtr)

drop qtr
g qtr = . 
replace qtr = 1 if mth == 3
replace qtr = 2 if mth == 6 
replace qtr = 3 if mth == 9
replace qtr = 4 if mth == 12

g time = yq(year, qtr)
format time %tq

*crosswalk from state abbreviations to fips codes
g fipstate = . 
replace fipstate = 1 if state == "AL"
replace fipstate = 2 if state == "AK"
replace fipstate = 4 if state == "AZ"
replace fipstate = 5 if state == "AR"
replace fipstate =6 if state == "CA"
replace fipstate =8 if state == "CO"
replace fipstate =9 if state == "CT"
replace fipstate =10 if state == "DE"
replace fipstate =11 if state == "DC"
replace fipstate =12 if state == "FL"
replace fipstate =13 if state == "GA"
replace fipstate =15 if state == "HI"
replace fipstate =16 if state == "ID"
replace fipstate =17 if state == "IL"
replace fipstate =18 if state == "IN"
replace fipstate = 19 if state == "IA"
replace fipstate =20 if state == "KS"
replace fipstate =21 if state == "KY"
replace fipstate =22 if state == "LA"
replace fipstate =25 if state == "MA"
replace fipstate =24 if state == "MD"
replace fipstate =23 if state == "ME"
replace fipstate =26 if state == "MI"
replace fipstate =27 if state == "MN"
replace fipstate =28 if state == "MS"
replace fipstate =29 if state == "MO"
replace fipstate =30 if state == "MT"
replace fipstate =37 if state == "NC"
replace fipstate =38 if state == "ND"
replace fipstate =33 if state == "NH"
replace fipstate =31 if state == "NE"
replace fipstate =34 if state == "NJ"
replace fipstate =35 if state == "NM"
replace fipstate =32 if state == "NV"
replace fipstate =36 if state == "NY"
replace fipstate =39 if state == "OH"
replace fipstate =40 if state == "OK"
replace fipstate =41 if state == "OR"
replace fipstate =42 if state == "PA"
replace fipstate =44 if state == "RI"
replace fipstate =45 if state == "SC"
replace fipstate =46 if state == "SD"
replace fipstate =47 if state == "TN"
replace fipstate =48 if state == "TX"
replace fipstate =49 if state == "UT"
replace fipstate = 50 if state == "VT"
replace fipstate =51 if state == "VA"
replace fipstate =53 if state == "WA"
replace fipstate =55 if state == "WI"
replace fipstate =54 if state == "WV"
replace fipstate =56 if state == "WY"

*FIPS state value labels

#delimit ; 
label define state
57 "US" 2 "AK" 1 "AL" 5 "AR" 4 "AZ" 6 "CA" 8 "CO" 9 "CT" 10 "DE" 12 "FL" 13 "GA" 
15 "HI" 19 "IA" 16 "ID" 17 "IL" 18 "IN" 20 "KS" 21 "KY" 22 "LA" 25 "MA" 24 "MD"
23 "ME" 26 "MI" 27 "MN" 29 "MO" 28 "MS"  30 "MT" 37 "NC" 38 "ND" 31 "NE" 33 "NH"
34 "NJ" 35 "NM" 32 "NV" 36 "NY" 39 "OH" 40 "OK" 41 "OR" 42 "PA" 44 "RI" 45 "SC"
46 "SD" 47 "TN" 48 "TX" 49 "UT" 51 "VA" 50 "VT" 53 "WA" 55 "WI"  54 "WV" 56 "WY"
;
label values fipstate state;
#delimit cr


drop if fipstate == . 
*non states

*replace missings with zeroes (recommendation from FAQs regarding CCP data)
foreach v of varlist crtr* cust* {
	replace `v' = 0 if `v' == .
}

destring zipcode, replace

save `data_dir'/equifax_pre_`y'_`version'_15, replace

}
}
////////////////////////////////////////////////////////////////////////////////
if mort_vars { 

local m _mort

if mort {
/*merge in mortgage definition variables so can split mortgage and home equity corretly (coding in main equifax 
 data wrong according to NY Fed) */

forval y = 1999/2015 {

use `data_dir'/equifax/fullyrs/mort`y', clear

*time variable
g year = year(qtr)
g mth = month(qtr)

drop qtr
g qtr = . 
replace qtr = 1 if mth == 3
replace qtr = 2 if mth == 6 
replace qtr = 3 if mth == 9
replace qtr = 4 if mth == 12

g time = yq(year, qtr)
format time %tq

*drop if loan not continuing from transfer
drop if continue_flag_d == "0"

*drop duplicate CIDs
duplicates tag cid time tlid, g(m)
drop if m != 0

*ECOA Codes: classify as joint or nonjoint and then subtract half if joint
g joint = 0
replace joint = 1 if ecoacode == "J" | ecoacode == "S" 
*joint, shared (share account but don't know if joint or authorized)

*single: individual account (I), undesignated (U), maker (M) - responsible unless default, on behalf of another person - financial responsibility, exclusively used by another person(B)

drop if ecoacode == "A" | ecoacode == "T" | ecoacode == "C"
*authorized use (don't have responsibility for payment), terminated (no longer have any  financial responsibility), co-maker (co-signature, only responsible if default)

replace balance = balance*.5 if joint == 1 

*collapse by same type of loans in the same time period 
collapse (sum) balance, by(cid tradeline_type_d time)

*create new variables for different types of loans
destring tradeline_type_d, replace

replace balance = 0 if balance == . 

g he_i = balance if tradeline_type_d == 2 
g he_r = balance if tradeline_type_d == 3 
drop if tradeline_type_d > 3

replace he_i = 0 if he_i == .
replace he_r = 0 if he_r == .

g he = he_i + he_r

collapse (max) mort he, by(cid time)

save `data_dir'/mort_merge_pre_`y'_`version'_15, replace

merge 1:1 cid time using `data_dir'/equifax_pre_`y'_`version'_15, keep(3) nogen

*non-merge 2 getting rid of people that have never had a mortgage/HE loan
*non-merge 1 is people in non-US states (Puerto Rico, Guam, Virgin Islands etc.)


save `data_dir'/equifax`m'_pre_`y'_`version'_15, replace
 
}
}
}
////////////////////////////////////////////////////////////////////////////////
if msa {

if mort_vars {
	local m _mort 
}

forval y = 1999/2015 {

**merge with full list of US zipcodes and throw out not real zipcodes 
import delimited using `data_dir'/us_postal_codes_0/us_postal_codes.csv, varn(1) clear

keep zipcode

merge 1:m zipcode using `data_dir'/equifax`m'_pre_`y'_`version'_15
*non-merge 2 because not real zip codes
*non-merge 1 are zip codes that are not in the equifax data

keep if _merge == 3
drop _merge

save `data_dir'/equifax_nozip_`y'_`version'_15, replace

***crosswalk zipcodes to CBSAs.
import excel using `data_dir'/ZIP_CBSA_032014.xlsx, first clear

destring CBSA, replace

rename ZIP zipcode
rename CBSA msa 

destring zipcode, replace 

*if duplicates, keep CBSA with highest ratio of residential addresses in the ZIP – 
* CBSA part to the total number of residential addresses in the entire ZIP. 
qui: egen res = group(zipcode)
levelsof res, local(re)
foreach r in `re' {
	su RES_RATIO if res == `r'
	drop if res == `r' & RES_RATIO != r(max)
}

*few where res_ratio is evenly split: randomly drop one msa
sort zipcode
g dup = .
replace dup = 1 if zipcode[_n] == zipcode[_n+1]  
drop if dup == 1

keep zipcode msa

merge 1:m zipcode using `data_dir'/equifax_nozip_`y'_`version'_15
*non-merge 1 are zipcodes that are not in the equifax sample
*non-merge 2 ?? 
	*micropolitan areas are included in the zip crosswalk. so nonmerges should be rural
	*but finding some zipcodes (ex 19850 (wilmington DE)) that are in MSAs but are not in the crosswalk 

keep if _merge == 3
drop _merge

replace msa = fipstate if msa == 99999
*these are not in a MSA so rural areas.
****MSA codes 1-56 are each states rural areas
****MSA codes 5 digits are MSAs

save `data_dir'/equifax_wrongcbsa_`y'_`version'_15, replace


*excel sheet for the real CBSA codes we should be using (crosswalk uses metropolitan division codes where available instead of CBSACodes)
import excel using `data_dir'/census_cbsa_2013_codes.xlsx, cellrange(A1:E1883) first clear

destring CBSACode, g(cbsa13)
destring MetroDivisionCode, g(msa)

collapse (first) cbsa13, by(msa)

merge 1:m msa using `data_dir'/equifax_wrongcbsa_`y'_`version'_15, keep(2 3)
replace msa = cbsa13 if _merge == 3 
drop _merge
drop cbsa13


save `data_dir'/equifax_pre2_`y'_`version'_15, replace
}
}
 /////////////////////////////////////////////////////////////////////////////////
if clean {

forval y = 1999/2015 {

use `data_dir'/equifax_pre2_`y'_`version'_15, clear

*nonmortgage variable - subtract mortgage and home equity and student loan variables (from the original dataset): 
g nonmort = crtr_attr166 - crtr_attr171 - crtr_attr172 - crtr_attr173 - crtr_attr175

*take out half if joint loan . (there are some categories for which we do not know if the loan is joint--especially loans
* without a specific designation.  We include these, there are not many but we cannot take out the joint part so there may be
* slight overcounting. 
replace nonmort = nonmort - .5*(cust_attr294 + cust_attr298 + cust_attr302 + cust_attr306 + cust_attr322 + cust_attr338)



la var nonmort "Nonmortgage Balance"

if mort_vars {
g nonmort_he = nonmort + he 
la var nonmort_he "Nonmortgage plus Home Equity Balance"
}

drop crtr* cust*

save `data_dir'/equifax_pre3_`y'_`version'_15, replace
}

*append years together
use `data_dir'/equifax_pre3_1999_`version'_15, clear 
forval y =  2000/2015 {
	append using `data_dir'/equifax_pre3_`y'_`version'_15
}

save `data_dir'/equifax_merged_`version'_15, replace 


use `data_dir'/equifax_merged_`version'_15, clear 

//////////////////
// by msa

*number of people in each MSA in each quarter 
bysort msa time: egen obs = count(cid)

if mort_vars {
	local vars nonmort nonmort_he
}
if nomort_vars {
	local vars nonmort
}

*sum all loans in each MSA 
collapse (sum) `vars' (first) obs, by(msa time) 

save `data_dir'/equifax_fin_`version'_msafin_15, replace

use `data_dir'/equifax_fin_`version'_msafin_15, clear

if mort_vars {
	local vars nonmort nonmort_he
}
if nomort_vars {
	local vars nonmort
}

*per capita loan variables 
foreach v in `vars' {
	g `v'_cap = `v'/obs
}

if mort_vars {
	local vars nonmort_cap nonmort_he_cap
}
if nomort_vars {
	local vars nonmort_cap
}

// make annual
g year = yofd(dofq(time))
collapse (mean) obs `vars', by(msa year)

*real: merge in PCE deflator 
merge m:1 year using `data_dir'/pce_bh_def
keep if _merge == 3 
drop _merge

foreach v in `vars' {
	g `v'_real = `v' / def 
}

if mort_vars {
	local vars nonmort_cap_real nonmort_he_cap_real
}
if nomort_vars {
	local vars nonmort_cap_real
}

*growth rate (1 yr)
xtset msa year
foreach v in `vars' {
	g `v'_ch = ((`v' - L.`v') / L.`v')*100
}

*winsorize: by year  
if mort_vars {
	local vars nonmort_cap_real_ch nonmort_he_cap_real_ch
}
if nomort_vars {
	local vars nonmort_cap_real_ch
}

foreach v in `vars' {
	
	g `v'_w = `v'

	forvalues y = 1999/2015 { 
		_pctile `v' if year == `y', percentiles(1,99)
		replace `v'_w = r(r1) if `v'<r(r1)  & year == `y'
		replace `v'_w = r(r2) if `v'>r(r2) & `v' != . & year == `y'
	}
}

save `data_dir'/equifax_yr_fin_`version'_15, replace

}
/////////////////////////////////////////////////////////////////////////////////

if analysis {
///////////////////////////
if merge {
***bank health***

*pull in bank health data and convert to annual and do other setup to match equifax sample

use `data_dir'/bnkh_msa_lags_fin_hmr_15.dta, replace

rename CBSA msa 
g time = yq(year, qtr)
format time %tq

xtset msa time

*want contemporary value for BH
*but database measures are lagged by 1 quarter - bring them forward 
g locshr = F.l1_LocShrM05bhc_2_msa
g npl_a_50 = F.l1_ent_npl_assetp50_msa
g npl_a_50_bhc = F.l1_M05bhc_npl_assetp50_msa

collapse (mean) npl_a_50 npl_a_50_bhc locshr, by(year msa)

*divide by 100 - so coefficents are easier to interpret
foreach bh in npl_a_50 npl_a_50_bhc { 
	replace `bh' = `bh'/100
}

*winsorize
foreach bh in npl_a_50 npl_a_50_bhc {
	g `bh'_w = `bh'
	
	forval y = 1999/2015 {	
		_pctile `bh' if year == `y', percentiles(1, 99)
		replace `bh'_w = r(r1) if `bh'<r(r1) & year == `y'
		replace `bh'_w = r(r2) if `bh'>r(r2) & `bh'!=. & year == `y'
	}	
}

*lag
xtset msa year

g l_locshr = L.locshr

g l_npl_a_50_w = L.npl_a_50_w
g l_npl_a_50_bhc_w = L.npl_a_50_bhc_w

*merge with equifax data 
merge 1:1 msa year using `data_dir'/equifax_yr_fin_`version'_15, keep(3) nogen

*this will drop all MSAs that do not have BH data.
*merge 1 is years not in equifax
*merge 2 is MSAs that do not have BH data. There are a few that are partial time periods. 

tempfile bh 
save "`bh'"



***employment growth***

* pull in employment growth (Bartik) data and also align it with equifax setup

use `data_dir'/bartik_msa_weighted_back_annual, clear

rename CBSA10 msa
rename bartik_empgr_back_msa emp_gr
xtset msa year

*winsorize
g emp_gr_w = emp_gr
forval y = 1999/2015 {
	_pctile emp_gr if year == `y', percentiles(1, 99)
	replace emp_gr_w = r(r1) if emp_gr<r(r1) & year == `y'
	replace emp_gr_w = r(r2) if emp_gr>r(r2) & emp_gr!=. & year == `y'
}
	
g l_emp_gr_w = L.emp_gr_w

*merge 
merge 1:1 msa year using "`bh'"

keep if _merge == 3 
drop _merge

tempfile emp
save "`emp'"

***HP growth***
use `data_dir'/hpi_msa_15.dta, clear

rename CBSA msa 

* annualize
collapse (mean) hpi, by(msa year)

*growth rate
xtset msa year
g hp_gr = ((hpi - L.hpi)/L.hpi)*100

*winsorize
g hp_gr_w = hp_gr
forval y = 1999/2015 {
	_pctile  hp_gr if year == `y', percentiles(1, 99)
	replace hp_gr_w = r(r1) if hp_gr<r(r1) & year == `y'
	replace hp_gr_w = r(r2) if hp_gr>r(r2) & hp_gr!=. & year == `y'
}
	
g l_hp_gr_w = L.hp_gr_w

*merge
merge 1:1 msa year using "`emp'", keep(3) nogen

la var l_npl_a_50_bhc_w "ML NPL50"
la var l_hp_gr_w "House Price Growth"
la var l_emp_gr_w "Bartik Empl. Growth"

save `data_dir'/equifax_dataset_annualfin_`version'_15, replace

}


* CREATE TABLE 1 in paper.

/////////////////////////////////////////
if tables {

*years local 
*omit 2015 from time dummies
forval y = 2000/2014 {
	local time_dum `time_dum' dum_`y'
}

// Non-Mortgage Excluding HE
use `data_dir'/equifax_dataset_annualfin_full_nomort_15, clear

*year dummies 
forval y = 2000/2015 {
	g dum_`y' = 0
	replace dum_`y' = 1 if year == `y'
}
*no 1999 because loan change eliminated it (CCP/equifax data start in 1999)

* keep only locations where ML deposits account for at least 5% of all deposits (as elsewhere) so ML banks are relevant
keep if l_locshr >= .05

eststo B: xtreg nonmort_cap_real_ch_w l_npl_a_50_bhc_w l_hp_gr_w l_emp_gr_w `time_dum' if nonmort_cap_real_ch_w != . & l_npl_a_50_bhc_w != . & l_hp_gr_w != . & l_emp_gr_w != .  , fe robust

count if e(sample) 
local obs2 = r(N)

// Non-Mortgage Including HE
use `data_dir'/equifax_dataset_annualfin_full_15, clear

forval y = 2000/2015 {
	g dum_`y' = 0
	replace dum_`y' = 1 if year == `y'
}

keep if l_locshr >= .05

eststo D: xtreg nonmort_he_cap_real_ch_w l_npl_a_50_bhc_w l_hp_gr_w l_emp_gr_w `time_dum' if nonmort_cap_real_ch_w != . & l_npl_a_50_bhc_w != . & l_hp_gr_w != . & l_emp_gr_w != .  , fe robust

count if e(sample) 
local obs4 = r(N)

#delimit ;
esttab B D using `tex_dir'/equifax_t1_fin.tex, replace starlevels(* 0.10 ** 0.05 *** 0.01) alignment(S[table-format=2.5, table-align-text-post=false])
cells(b(star fmt(%5.3fc) label("")) se(par fmt(%5.3fc) label(""))) 
keep(l_npl_a_50_bhc_w  l_hp_gr_w l_emp_gr_w)
order(l_npl_a_50_bhc_w l_hp_gr_w l_emp_gr_w) 
stats(r2_a, fmt(%5.3f) labels("Adj. R-squared")) varwidth(25) modelwidth(6)
mtitles(none) nodepvars mlabels(none) booktabs
posthead("                         &\multicolumn{1}{c}{Excluding Home Equity}&\multicolumn{1}{c}{Including Home Equity}\\
&\multicolumn{1}{c}{(1)} &\multicolumn{1}{c}{(2)}\\ \hline")
title("Bank Health and Annual Real Per Capita Nonmortgage Loan Growth") collabels(none) label long nonumbers
prefoot("\hline \multicolumn{1}{l}{Observations} & \multicolumn{1}{c}{`obs2'\phantom{*}} & \multicolumn{1}{c}{`obs4'\phantom{*}} \\")
;
#delimit cr
}
}
